# 教师模型（知识来源）- 使用Qwen2.5-7B作为教师
teacher_model_name = "Qwen/Qwen2.5-7B"  # 或者使用更大的教师模型

# 学生模型（在4060上训练）- 使用4位量化的较小模型
student_model_name = "unsloth/Qwen3-4B-unsloth-bnb-4bit"  # 或Qwen3-4B版本

# 初始化学生模型
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=student_model_name,
    max_seq_length=2048,
    dtype=torch.float16,
    load_in_4bit=True,
)

# 配置LoRA适配器
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)